library(tidyverse)
library(estimatr)
library(haven)
library(viridis)
library(scales)
library(sandwich)
library(lmtest)

df <- read_dta('Data/Reanalysis/jopdataFINAL.dta')

df1 <- df %>%
  mutate(
    distance = ifelse(distance_pct >= 0.5, -1 * abs(distance_pct - 1), distance_pct),
    election = ifelse(distance_pct >= 0.5, 0, 1))


model1 <- glm(pty_close2cat ~ 0 + distance * election * newname + female + age + 
                urban + high_ed, 
              data = df1, weights = final_weight, family = binomial(link = "logit"))

model1_tidy <- tidy(model1)
model1_names <- model1_tidy[264:348,]

countries <- model1_names %>% filter(!is.na(estimate))
countries <- countries$term
countries <- str_remove(countries, 'distance:election:newname')

df2 <- df1 %>% filter(newname %in% countries)

model_sub <- glm(pty_close2cat ~ 0 + distance * election * newname + female + age + urban + high_ed, 
                 data = df2, weights = final_weight, family = binomial(link = "logit"))


n1 <- ggeffects::ggpredict(model_sub, terms = c('distance [all]','newname','election'), vcov_fun = "vcovCL", vcov_type = "HC0",
                           vcov_args = list(cluster = df2$newname))

data1a <- n1 %>% filter(facet == 0 & x <= 0)
data1b <- n1 %>% filter(facet == 1 & x >= 0)

final <- rbind(data1a,data1b) %>% as_tibble()

final_post <- final %>% filter(x > 0)

final_post <- final_post %>% group_by(group, facet) %>% arrange(group, facet) %>% 
  mutate(conf.low = modelbased::smoothing(conf.low, method = "loess", strength = 0.4),
         conf.high = modelbased::smoothing(conf.high, method = "loess", strength = 0.4))

df_slope <- final_post %>% filter(x == 0.001 | x == .5) %>% group_by(group) %>% 
  mutate(lag = lag(predicted)-predicted) %>% select(lag, group) %>% 
  drop_na()

neutral <- df_slope %>% filter((lag < .05 & lag >= 0)|(lag > -.05 & lag <= 0)) %>% 
  select(group) %>% unlist() %>% as.vector()

negative <- df_slope %>% filter((lag > .05 & lag >= 0)) %>% 
  select(group) %>% unlist() %>% as.vector()

positive <- df_slope %>% filter((lag < -.05 & lag <= 0)) %>% 
  select(group) %>% unlist() %>% as.vector()

final_post <- final_post %>% mutate(type = case_when(
  group %in% negative ~ 'Negative (41% of countries)',
  group %in% positive ~ 'Positive (35% of countries)',
  group %in% neutral ~ 'Neutral (24% of countries)'))


myCol1 <- viridis(n =21, option = 'C')
myCol2 <- viridis(n =12, option = 'C')
myCol3 <- viridis(n =18, option = 'C')

#factor(final_post$group, levels = c(negative, neutral, positive))

final_plot <- ggplot(final_post)+
  geom_line(aes(x=x, y = predicted, color = group), linewidth = 1)+
  geom_vline(xintercept = 0, color = 'red', linetype=2)+
  facet_wrap(~type)+
  theme_bw()+
  theme(legend.position="none")+
  scale_colour_manual(values=c(myCol1, myCol2, myCol3))+
  labs(x="Distance after election",
       y='Predictable Probability of Partisanship')+
  theme(axis.text = element_text(size = 8),
        axis.title = element_text(size = 9))

#ggsave(final_plot, file = 'final_plot_mich.png', units = 'in', width = 8, height = 5, dpi = 350)

m1<-coeftest(model_sub, vcov. = vcovCL(model_sub, cluster = df2$ctryrd, type = "HC0"))

tidy_sub <- tidy(m1)
tvalues <- tidy_sub[159:208,]$statistic

hist_t <- ggplot()+
  geom_histogram(aes(tvalues), binwidth = 2, fill = 'slateblue', color = 'black')+
  theme_bw()+
  labs(x = 't-statistic',
       y = 'Count')



StatBin2 <- ggproto(
  "StatBin2", 
  StatBin,
  compute_group = function (data, scales, binwidth = NULL, bins = NULL, 
                            center = NULL, boundary = NULL, 
                            closed = c("right", "left"), pad = FALSE, 
                            breaks = NULL, origin = NULL, right = NULL, 
                            drop = NULL, width = NULL) {
    if (!is.null(breaks)) {
      if (!scales$x$is_discrete()) {
        breaks <- scales$x$transform(breaks)
      }
      bins <- ggplot2:::bin_breaks(breaks, closed)
    }
    else if (!is.null(binwidth)) {
      if (is.function(binwidth)) {
        binwidth <- binwidth(data$x)
      }
      bins <- ggplot2:::bin_breaks_width(scales$x$dimension(), binwidth, 
                                         center = center, boundary = boundary, 
                                         closed = closed)
    }
    else {
      bins <- ggplot2:::bin_breaks_bins(scales$x$dimension(), bins, 
                                        center = center, boundary = boundary, 
                                        closed = closed)
    }
    res <- ggplot2:::bin_vector(data$x, bins, weight = data$weight, pad = pad)
    
    # drop 0-count bins completely before returning the dataframe
    res <- res[res$count > 0, ] 
    
    res
  })

tvalues <- as_tibble(tvalues)

hist_t <- ggplot()+
  geom_histogram(data = tvalues, aes(value, after_stat(count)),
                 binwidth = 2, fill = 'slateblue', color = 'black', stat = StatBin2)+
  theme_bw()+
  geom_density(data = tvalues, aes(value, after_stat(count)))+
  geom_vline(aes(xintercept=c(-1.96, 1.96)), color = 'red', linetype = 'dashed')+
  xlim(-130,130)+
  geom_text(aes(x=20, y=7), label="italic('t =')~ 1.96", color = 'red', parse = TRUE)+
  geom_text(aes(x=-25, y=7), label="italic('t =')~ -1.96", color = 'red', parse = TRUE)+
  labs(x = 't-statistic',
       y = 'Count')+
  theme(plot.margin = margin(.2,3,.2,3, "cm"))+
  theme(axis.text = element_text(size = 8),
        axis.title = element_text(size = 9))

#ggsave(hist_t, file = 'hist_tstat.png', units = 'in', width = 8, height = 5, dpi = 350)


final_fig1 <- ggpubr::ggarrange(final_plot, hist_t, 
                                labels = c("A", "B"), 
                                ncol = 1, nrow = 2, heights = c(1.6,1),
                                font.label = list(size = 9, color = "black", face = "bold"))
suppressMessages(print(final_fig1))
#ggsave(final_fig1, file = 'fig1_final.png', units = 'in', width = 7.25, height = 6.5, dpi = 350)
